*File description: This file takes the Harmonized SHARE data as input and prepares a data file for the main analysis

clear
set more off
pause on

global working_data "$path/data"

********************************************************************************
*Load data and keep variables relevant for the analysis in long format
********************************************************************************

use "$working_data/H_SHARE_d.dta", clear

	*Household income for wave 1 (In SHARE wave 1 income variables were collected before taxes and social contributions while subsequent waves collected only after-tax income variables. To solve this issue Bertoni et al. (2016) have estimated harmonized post-tax estimates for SHARE wave 1, which we use in this paper. See: Bertoni, M., A. Bonfatti, C. Dal Bianco, G. Weber, and F. Zantomio, "Harmonized net income measures in SHARE Wave 1," SHARE working paper series 25-2016, 2016.)
	merge 1:1 mergeid using "$working_data/sharew1_rel6-0-0_ALL_datasets_stata/sharew1_rel6-0-0_gv_grossnet.dta", keepusing(ytotn)
	drop _merge

	egen h1ittot  = sum(ytotn) if ytotn!=., by(h1coupid)

keep nmergeid ///
	country ///
	isocountry ///
	ragender ///
	rabyear ///
	rabmonth ///
	r?iwy ///
	r?iwm ///
	r?lbrf_s ///
	r?agey ///
	r?agem ///
	r?eurod ///
	r?iearn ///
	r?itearn ///
	r?isemp ///
	h?irent ///
	h?itrent ///
	h?itrest ///
	h?ittrest ///
	r?itsemp ///
	r?ipena  ///
	r?itpena  ///
	r?issdi ///
	r?itssdi ///
	r?isret ///
	r?itsret ///
	r?ipubpen ///
	r?itpubpen ///
	r?pubpen ///
	r?igxfr  ///
	r?itgxfr  ///
	r?iothr ///
	r?itothr ///
	r?jhours ///
	r?jhour2 ///
	r?work ///
	h?ittot ///
	r?mstat ///
	raedyrs ///
	raedisced ///
	r?adla ///
	r?iadlza ///
	r?socwk ///
	r?socyr ///
	r?pubage ///
	h?atotb ///
	hh?atotb ///
	h?atotn ///
	hh?atotn 

	
rename r1iearn r1itearn
rename r1isemp r1itsemp
rename h1irent h1itrent
rename h1itrest h1ittrest
rename r1ipena r1itpena
rename r1issdi r1itssdi
rename r1isret r1itsret
rename r1ipubpen r1itpubpen
rename r1igxfr r1itgxfr
rename r1iothr r1itothr

reshape long r@iwy ///
	r@iwm ///
	r@lbrf_s ///
	r@agey ///
	r@agem ///
	r@eurod ///
	r@itearn ///
	r@itsemp ///
	h@itrent ///
	h@ittrest ///
	r@itpena  ///
	r@itssdi ///
	r@itsret ///
	r@itpubpen ///
	r@pubpen ///
	r@itgxfr  ///
	r@itothr ///
	r@delta_eurod ///
	r@jhours ///
	r@jhour2 ///
	r@work ///
	h@ittot ///
	r@mstat ///
	r@adla ///
	r@iadlza ///
	r@socwk ///
	r@socyr ///
	r@pubage ///
	h@atotb ///
	hh@atotb ///
	h@atotn ///
	hh@atotn ///
	, i(nmergeid) j(wave)
	
label drop agecl
label define surveywave 1 "1" 2 "2" 4 "4" 5 "5" 6 "6"
label value wave surveywave

rename riwy year

*Only keep 10 countries for the sample
keep if isocountry==208 | isocountry==752 | isocountry==40 | isocountry==56 | isocountry==250 | isocountry==276 ///
	| isocountry==528 | isocountry==756 | isocountry==380 | isocountry==724
	
********************************************************************************

********************************************************************************
* Generate miscellaneous variables
********************************************************************************	
*Depression indicator	
	gen depression=0 if reurod!=.
	replace depression=1 if inrange(reurod,4,12)

*Generate new variable for self-reported labour market status that also collects 
	*those that are not defined but that are defined based on income
	gen self_lf=rlbrf_s
	replace self_lf=9 if (rlbrf_s==.d | rlbrf_s==.m | rlbrf_s==.o | rlbrf_s==.r)
	label define self_LMS 1 "(Self-)employed" 3 "Unemployed" 5 "Retired" ///
	6 "Disabled" 8 "Homemaker" 9 "Undefined"
	label value self_lf self_LMS

*Income variables	
	gen hh_inc=hittot/10000
	gen log_hhinc=log(hittot)

*Define binary variables for educational attainment
	gen educ_no=0
	gen educ_primary=0
	gen educ_sec_low=0
	gen educ_sec_up=0
	gen educ_post_sec=0
	gen educ_tert_first=0
	gen educ_tert_sec=0

	replace educ_no=1 if raedisced==0
	replace educ_primary=1 if raedisced==1
	replace educ_sec_low=1 if raedisced==2
	replace educ_sec_up=1 if raedisced==3
	replace educ_post_sec=1 if raedisced==4
	replace educ_tert_first=1 if raedisced==5
	replace educ_tert_sec=1 if raedisced==6
		
*Define binary variables for marital status
	gen mar_married=.
	replace mar_married=0 if rmstat!=. & rmstat!=.d & rmstat!=.r & rmstat!=.u & rmstat!=.v & rmstat!=.m
	replace mar_married=1 if rmstat==1
		
	gen mar_partner=.
	replace mar_partner=0 if rmstat!=. & rmstat!=.d & rmstat!=.r & rmstat!=.u & rmstat!=.v & rmstat!=.m
	replace mar_partner=1 if rmstat==3

	gen mar_separ=.
	replace mar_separ=0 if rmstat!=. & rmstat!=.d & rmstat!=.r & rmstat!=.u & rmstat!=.v & rmstat!=.m
	replace mar_separ=1 if rmstat==4
		
	gen mar_divor=.
	replace mar_divor=0 if rmstat!=. & rmstat!=.d & rmstat!=.r & rmstat!=.u & rmstat!=.v & rmstat!=.m
	replace mar_divor=1 if rmstat==5
		
	gen mar_widow=.
	replace mar_widow=0 if rmstat!=. & rmstat!=.d & rmstat!=.r & rmstat!=.u & rmstat!=.v & rmstat!=.m
	replace mar_widow=1 if rmstat==7
		
	gen mar_never=.
	replace mar_never=0 if rmstat!=. & rmstat!=.d & rmstat!=.r & rmstat!=.u & rmstat!=.v & rmstat!=.m
	replace mar_never=1 if rmstat==8

*Country variable
egen d_country=group(isocountry)

*Create count variable for years
egen d_year=group(year)	

*Indicator for social acitvities based on rsocwk and rsocyr
gen social_activities = 0 if rsocwk!=. | rsocyr!=.
replace social_activities = 1 if rsocwk==1 | rsocyr==1

	
********************************************************************************
* Labor-market definitions: defined using 4 attributes for (i) retirement income, (ii) earnings, (iii) hours worked, and (iv) DI income
********************************************************************************			
*Generate 4 attributes

	*Any retirement income (public or private)
	gen d_ret_inc = 0 if ritpena!=. & ritsret!=.
	replace d_ret_inc=1 if ((ritpena>0 & ritpena!=.) | (ritsret>0 & ritsret!=.))
	
	*Any income from work or self-employment
	gen d_earn_inc = 0 if ritearn!=. & ritsemp!=.
	replace d_earn_inc=1 if ((ritearn>0 & ritearn!=.) | (ritsemp>0 & ritsemp!=.))
	
	*10 or more working hours per week
	gen work_hours=rjhours
	replace work_hours=rjhours+rjhour2 if rjhour2!=.w & rjhour2!=.d & rjhour2!=.m & rjhour2!=.r & rjhour2!=. & rjhour2!=0
	replace work_hours=0 if rwork==0
	
	gen d_works_10 = 0 
	replace d_works_10=1 if work_hours>=10 & work_hours!=. & work_hours!=.d & work_hours!=.m & work_hours!=.r 

	*Any disbaility insurance (DI) income
	gen d_di_inc = 0 if ritssdi!=.
	replace d_di_inc=1 if ritssdi>0 & ritssdi!=.
	
*Construct variable for labor-market status using 4 attributes
gen c_lms_new = .
	*Retired
	replace c_lms_new = 1 if d_ret_inc==1 & d_di_inc==0 & d_earn_inc==0 & d_works_10==0 
	replace c_lms_new = 1 if d_ret_inc==1 & d_di_inc==0 & d_earn_inc==1 & d_works_10==0 
	replace c_lms_new = 1 if d_ret_inc==1 & d_di_inc==1 & d_earn_inc==0 & d_works_10==0 
	replace c_lms_new = 1 if d_ret_inc==1 & d_di_inc==1 & d_earn_inc==1 & d_works_10==0 
	
	*Employed
	replace c_lms_new = 2 if d_ret_inc==0 & d_di_inc==0 & d_earn_inc==1 & d_works_10==1
	
	*Unemployed
	replace c_lms_new = 3 if d_ret_inc==0 & d_di_inc==0 & d_earn_inc==0 & d_works_10==0 
	replace c_lms_new = 3 if d_ret_inc==0 & d_di_inc==0 & d_earn_inc==1 & d_works_10==0 
	
	*Disabled
	replace c_lms_new = 4 if d_ret_inc==0 & d_di_inc==1 & d_earn_inc==0 & d_works_10==0 
	replace c_lms_new = 4 if d_ret_inc==0 & d_di_inc==1 & d_earn_inc==1 & d_works_10==0 
	
	*Other
	replace c_lms_new = 9 if d_ret_inc==0 & d_di_inc==0 & d_earn_inc==0 & d_works_10==1
	replace c_lms_new = 9 if d_ret_inc==0 & d_di_inc==1 & d_earn_inc==0 & d_works_10==1
	replace c_lms_new = 9 if d_ret_inc==0 & d_di_inc==1 & d_earn_inc==1 & d_works_10==1
	replace c_lms_new = 9 if d_ret_inc==1 & d_di_inc==0 & d_earn_inc==0 & d_works_10==1
	replace c_lms_new = 9 if d_ret_inc==1 & d_di_inc==0 & d_earn_inc==1 & d_works_10==1
	replace c_lms_new = 9 if d_ret_inc==1 & d_di_inc==1 & d_earn_inc==0 & d_works_10==1
	replace c_lms_new = 9 if d_ret_inc==1 & d_di_inc==1 & d_earn_inc==1 & d_works_10==1

	
	label define c_lms_new_label 1 "Retired" 2 "Employed" 3 "Unemployed" 4 "Disabled" ///
		9 "Other" 
	label value c_lms_new c_lms_new_label
********************************************************************************
	
	
********************************************************************************
* Early Retirement Age (ERA) in months
********************************************************************************
gen era_months = .

*******************
*Austria
*******************

*Men
replace era_months = 60*12 if isocountry==40 & ragender==1

*Women
replace era_months = 55*12 if isocountry==40 & ragender==2

*******************
*Belgium
*******************

*Men and women
replace era_months = 60*12 if isocountry==56 & rabyear<=1952
replace era_months = 60.5*12 if isocountry==56 & rabyear==1953 & rabmonth<=6
replace era_months = 61*12 if isocountry==56 & rabyear==1953 & rabmonth>=7
replace era_months = 61.5*12 if isocountry==56 & rabyear==1954 & rabmonth<=6
replace era_months = 62*12 if isocountry==56 & rabyear==1954 & rabmonth>=7
replace era_months = 62*12 if isocountry==56 & rabyear==1955
replace era_months = 63*12 if isocountry==56 & rabyear>=1955

*******************
*Denmark
*******************

*Men and women
replace era_months = 60*12 if isocountry==208 & rabyear<=1953
replace era_months = 60.5*12 if isocountry==208 & rabyear==1954 & rabmonth<=6
replace era_months = 61*12 if isocountry==208 & rabyear==1954 & rabmonth>=7
replace era_months = 61.5*12 if isocountry==208 & rabyear==1955 & rabmonth<=6
replace era_months = 62*12 if isocountry==208 & rabyear==1955 & rabmonth>=7
replace era_months = 62.5*12 if isocountry==208 & rabyear==1956 & rabmonth<=6
replace era_months = 63*12 if isocountry==208 & rabyear==1956 & rabmonth>=7
replace era_months = 63*12 if isocountry==208 & inrange(rabyear, 1957, 1958)
replace era_months = 63.5*12 if isocountry==208 & rabyear==1959 & rabmonth<=6
replace era_months = 64*12 if isocountry==208 & rabyear==1959 & rabmonth>=7
replace era_months = 64*12 if isocountry==208 & rabyear>=1960

*******************
*France
*******************

*Men and women
replace era_months = 56*12 if isocountry==250 

*******************
*Germany
*******************

*Men
replace era_months = 60*12 if isocountry==276 & rabyear<=1945 & ragender==1
replace era_months = 60*12 + 1 if isocountry==276 & rabyear==1946 & rabmonth==1 & ragender==1
replace era_months = 60*12 + 2 if isocountry==276 & rabyear==1946 & rabmonth==2 & ragender==1
replace era_months = 60*12 + 3 if isocountry==276 & rabyear==1946 & rabmonth==3 & ragender==1
replace era_months = 60*12 + 4 if isocountry==276 & rabyear==1946 & rabmonth==4 & ragender==1
replace era_months = 60*12 + 5 if isocountry==276 & rabyear==1946 & rabmonth==5 & ragender==1
replace era_months = 60*12 + 6 if isocountry==276 & rabyear==1946 & rabmonth==6 & ragender==1
replace era_months = 60*12 + 7 if isocountry==276 & rabyear==1946 & rabmonth==7 & ragender==1
replace era_months = 60*12 + 8 if isocountry==276 & rabyear==1946 & rabmonth==8 & ragender==1
replace era_months = 60*12 + 9 if isocountry==276 & rabyear==1946 & rabmonth==9 & ragender==1
replace era_months = 60*12 + 10 if isocountry==276 & rabyear==1946 & rabmonth==10 & ragender==1
replace era_months = 60*12 + 11 if isocountry==276 & rabyear==1946 & rabmonth==11 & ragender==1
replace era_months = 60*12 + 12 if isocountry==276 & rabyear==1946 & rabmonth==12 & ragender==1
replace era_months = 61*12 + 1 if isocountry==276 & rabyear==1947 & rabmonth==1 & ragender==1
replace era_months = 61*12 + 2 if isocountry==276 & rabyear==1947 & rabmonth==2 & ragender==1
replace era_months = 61*12 + 3 if isocountry==276 & rabyear==1947 & rabmonth==3 & ragender==1
replace era_months = 61*12 + 4 if isocountry==276 & rabyear==1947 & rabmonth==4 & ragender==1
replace era_months = 61*12 + 5 if isocountry==276 & rabyear==1947 & rabmonth==5 & ragender==1
replace era_months = 61*12 + 6 if isocountry==276 & rabyear==1947 & rabmonth==6 & ragender==1
replace era_months = 61*12 + 7 if isocountry==276 & rabyear==1947 & rabmonth==7 & ragender==1
replace era_months = 61*12 + 8 if isocountry==276 & rabyear==1947 & rabmonth==8 & ragender==1
replace era_months = 61*12 + 9 if isocountry==276 & rabyear==1947 & rabmonth==9 & ragender==1
replace era_months = 61*12 + 10 if isocountry==276 & rabyear==1947 & rabmonth==10 & ragender==1
replace era_months = 61*12 + 11 if isocountry==276 & rabyear==1947 & rabmonth==11 & ragender==1
replace era_months = 61*12 + 12 if isocountry==276 & rabyear==1947 & rabmonth==12 & ragender==1
replace era_months = 62*12 + 1 if isocountry==276 & rabyear==1948 & rabmonth==1 & ragender==1
replace era_months = 62*12 + 2 if isocountry==276 & rabyear==1948 & rabmonth==2 & ragender==1
replace era_months = 62*12 + 3 if isocountry==276 & rabyear==1948 & rabmonth==3 & ragender==1
replace era_months = 62*12 + 4 if isocountry==276 & rabyear==1948 & rabmonth==4 & ragender==1
replace era_months = 62*12 + 5 if isocountry==276 & rabyear==1948 & rabmonth==5 & ragender==1
replace era_months = 62*12 + 6 if isocountry==276 & rabyear==1948 & rabmonth==6 & ragender==1
replace era_months = 62*12 + 7 if isocountry==276 & rabyear==1948 & rabmonth==7 & ragender==1
replace era_months = 62*12 + 8 if isocountry==276 & rabyear==1948 & rabmonth==8 & ragender==1
replace era_months = 62*12 + 9 if isocountry==276 & rabyear==1948 & rabmonth==9 & ragender==1
replace era_months = 62*12 + 10 if isocountry==276 & rabyear==1948 & rabmonth==10 & ragender==1
replace era_months = 62*12 + 11 if isocountry==276 & rabyear==1948 & rabmonth==11 & ragender==1
replace era_months = 62*12 + 12 if isocountry==276 & rabyear==1948 & rabmonth==12 & ragender==1
replace era_months = 63*12 if isocountry==276 & rabyear>=1949 & ragender==1

*Women
replace era_months = 60*12 if isocountry==276 & rabyear<=1951 & ragender==2
replace era_months = 63*12 if isocountry==276 & rabyear>=1952 & ragender==2

*******************
*Italy
*******************

*Men 
replace era_months = 57*12 if isocountry==380 & rabyear<=1950 & ragender==1
replace era_months = 58*12 if isocountry==380 & rabyear==1951 & ragender==1
replace era_months = 59*12 if isocountry==380 & rabyear==1952 & ragender==1
replace era_months = 60*12 if isocountry==380 & rabyear>=1953 & ragender==1

*Women
replace era_months = 57*12 if isocountry==380 & ragender==2

*******************
*Netherlands
*******************

*Men and women
replace era_months = 60*12 if isocountry==528 & rabyear<=1949
replace era_months = 62*12 if isocountry==528 & rabyear>=1950


*******************
*Spain
*******************

*Men and women
replace era_months = 60*12 if isocountry==724 & rabyear<=1950
replace era_months = 60*12 + 6 if isocountry==724 & rabyear==1951 & rabmonth<=6
replace era_months = 60*12 + 8 if isocountry==724 & rabyear==1951 & rabmonth>=7
replace era_months = 60*12 + 8 if isocountry==724 & rabyear==1952 & rabmonth<=4
replace era_months = 61*12 + 1 if isocountry==724 & rabyear==1952 & rabmonth>=5 & rabmonth<=11
replace era_months = 61*12 + 2 if isocountry==724 & rabyear==1952 & rabmonth==12
replace era_months = 61*12 + 2 if isocountry==724 & rabyear==1953 & rabmonth<=10
replace era_months = 61*12 + 3 if isocountry==724 & rabyear==1953 & rabmonth>=11
replace era_months = 61*12 + 3 if isocountry==724 & rabyear==1954 & rabmonth<=9
replace era_months = 61*12 + 4 if isocountry==724 & rabyear==1954 & rabmonth>=10
replace era_months = 61*12 + 4 if isocountry==724 & rabyear==1955 & rabmonth<=8
replace era_months = 61*12 + 5 if isocountry==724 & rabyear==1955 & rabmonth>=9
replace era_months = 61*12 + 5 if isocountry==724 & rabyear==1956 & rabmonth<=7
replace era_months = 61*12 + 6 if isocountry==724 & rabyear==1956 & rabmonth>=8
replace era_months = 61*12 + 6 if isocountry==724 & rabyear==1957 & rabmonth<=6
replace era_months = 61*12 + 8 if isocountry==724 & rabyear==1957 & rabmonth>=7
replace era_months = 61*12 + 8 if isocountry==724 & rabyear==1958 & rabmonth<=4
replace era_months = 61*12 + 10 if isocountry==724 & rabyear==1958 & rabmonth>=5
replace era_months = 61*12 + 10 if isocountry==724 & rabyear==1959 & rabmonth<=2
replace era_months = 62*12 if isocountry==724 & rabyear==1959 & rabmonth>=3
replace era_months = 62*12 + 2 if isocountry==724 & rabyear==1960 & rabmonth<=10
replace era_months = 62*12 + 4 if isocountry==724 & rabyear==1960 & rabmonth>=11
replace era_months = 62*12 + 4 if isocountry==724 & rabyear==1961 & rabmonth<=8
replace era_months = 62*12 + 6 if isocountry==724 & rabyear==1961 & rabmonth>=9
replace era_months = 62*12 + 6 if isocountry==724 & rabyear==1962 & rabmonth<=6
replace era_months = 62*12 + 8 if isocountry==724 & rabyear==1962 & rabmonth>=7
replace era_months = 62*12 + 8 if isocountry==724 & rabyear==1963 & rabmonth<=4
replace era_months = 62*12 + 10 if isocountry==724 & rabyear==1963 & rabmonth>=5
replace era_months = 62*12 + 10 if isocountry==724 & rabyear==1964 & rabmonth<=2
replace era_months = 63*12 if isocountry==724 & rabyear==1964 & rabmonth>=3
replace era_months = 63*12 if isocountry==724 & rabyear>=1965 

*******************
*Sweden
*******************

*Men and women
replace era_months = 61*12 if isocountry==752

*******************
*Switzerland
*******************

*Men
replace era_months = 64*12 if isocountry==756 & ragender==1 & rabyear<=1936
replace era_months = 63*12 if isocountry==756 & ragender==1 & rabyear>=1936

*Women
replace era_months = 62*12 if isocountry==756 & ragender==2

*******************
*Remove observations with no birth year 
replace era_months = . if rabyear==. | rabyear==.d

********************************************************************************

			
*Add labels
label define genderX 1 "Men" 2 "Women"
label value ragender genderX
label variable wave "Wave"
label variable isocountry "Country"
label define sample_countries 40 "Austria" 56 "Belgium" 208 "Denmark" 250 "France" ///
	276 "Germany" 300 "Greece" 380 "Italy" 528 "Netherlands" 724 "Spain" 752 "Sweden" 756 "Switzerland"
label value isocountry sample_countries
label variable d_country "Country"
label define sample_countries2 1 "Austria" 2 "Belgium" 3 "Denmark" 4 "France" ///
	5 "Germany" 6 "Italy" 7 "Netherlands" 8 "Spain" 9 "Sweden" 10 "Switzerland"
label value d_country sample_countries2
label variable ragey "Age"
label variable reurod "EURO-D"
label variable hittot "Hh income (EUR)"
label variable self_lf "Self-reported"
label variable radla "ADL"
label variable riadlza "IADL"
label variable educ_no "No"
label variable educ_primary "Primary"
label variable educ_sec_low "Lower secondary"
label variable educ_sec_up "Upper secondary"
label variable educ_post_sec "Post-secondary"
label variable educ_tert_first "First stage tertiary"
label variable educ_tert_sec "Second stage tertiary"
label variable mar_married "Married"
label variable mar_partner "Partnered"
label variable mar_separ "Separated"
label variable mar_divor "Divorced"
label variable mar_widow "Widowed"
label variable mar_never "Never married"


********************************************************************************
*Save data
********************************************************************************
save "$working_data/SHARE_JOLE_replication.dta", replace
********************************************************************************
